*Samuels and Teele (2020) Replication 
*2020. Samuels, David and Dawn Teele. “New Medium, Same Story? The Gender Gap in Book Publishing.” PS: Political Science and Politics. 
*This dataset is anonymized. If you would like titles, author names etc. please email teele.academic@gmail.com

cd "./foldername/"  /*Put the full path to the directory with our replication data here*/ 



************************
**Sample size information cited in text
************************
use "Samuels_Teele_allpresses_anonymous.dta", clear

count /* 25,898*/ 
tab uni, mi /*8250 uni-press titles */

count if all_male==1 | man_only==1
di 17909/ 25898  /*Books only men 69% */ 
count if all_women==1 | woman_only==1
di 5065/ 25898 /*Books only women 19.5% */

************************
**Table 1
************************

tab uni man_only
tab uni all_male
tab uni woman_only
tab uni all_women
tab uni coed
tab uni anywoman, col row 
*share any woman = 30.44
	


*********************************
*Figure 1: Line graph Authorship type over time
*********************************
use "Samuels_Teele_allpresses_anonymous.dta", clear 

keep if unipress==1 & subfield!=5 /*Keep only university presses and drop the "irrelevant" subfield */ 

count

preserve
keep if year<2016 /*data not complete for 2016*/ 
collapse (sum) noauthors nwomen  (mean) coed all_male woman_only man_only all_women, by(year)

foreach var in all_male woman_only man_only all_women coed {

replace `var'=100 *round(`var', .01)
} 


twoway (line man_only year) (line all_male year) (line woman_only year) (line all_women year) (line coed year), yscale(range(0 70)) ylab(0 (10) 70) scheme(tufte) name(g4, replace) legend(off) text(68 2005 "Solo Man") text(24 2005 "Solo Woman") text(15 2005 "Male Team") text(9 2005 "Coed Team") text(4.5 2014 "Woman Team") xtitle("") 

graph export "Figure1.eps", replace 

*share of women over time
gen percentwomen=nwomen/noauthors
lowess percentwomen year, mlabel(percentwome) 
*Text cited claim "The total share of women among authors rose by a third between 2004 and 2015, from 21% to nearly 28%."

restore

***********************************
*ANALYSIS of university presses and subfield coauthorship patterns. 
***********************************
use "Samuels_Teele_allpresses_anonymous.dta", clear 

keep if unipress==1 /*Keep only university presses*/ 

*Table 2: 
tab subfield, mi
di 8141-7774

collapse (sum) noauthors nwomen (mean) anywoman , by(subfield)
 	
	gen percentwomen=nwomen/noauthors
	sum percentwomen
	
		*.27 women average authors 
	gen p=round(percentwomen*100, 1)
	tostring p, replace
	egen name=concat(subfield p), punct( " ")
	
	gen nmen=noauthors-nwomen

***********************************
*Table 2: Summary statistics by subfield, university presses. 
***********************************	
list  

***********************************
*Figure 2: Subfield over time
***********************************

use "Samuels_Teele_allpresses_anonymous.dta", clear 

keep if unipress==1 

drop if inlist(subfield, 5,6) 
	
drop if year>=2015

collapse (sum) noauthors nwomen , by(subfield year)
	
gen percentwomen=round((nwomen/noauthors)*100, 1)
egen fieldaverage=mean(percentwome), by(subfield)

la def sub 1 "American" 2 "International Relations" 3 "Comparative Politics" 4 "Political Theory", replace
la values subfield sub

twoway (line percentwomen year) (line fieldaverage year, lcolor(blue)), by(subfield, note("") legend(off) ) ytitle("Women Among Authors, %")xtitle("")

graph export Figure2.eps, replace

**********************************
*Impact 
**********************************

use "Samuels_Teele_allpresses_anonymous.dta", clear 

keep if unipress==1 
keep if DidCitation==1

drop if inlist(subfield, 5) /*irrelevant subfield*/ 

*Keep Academic Political Scientists and non-reprints
keep if polisci==1

egen type=group(man_only all_male woman_only all_women coed)
label define type 5 "Solo Man" 4 "Male Team" 3 "Solo Wom" 2 "Fem Team" 1 "Coed Team"
label values type type

tab type

encode rank, gen(Rank) 
recode Rank (4=0) 

drop if rank=="Reprint" 
drop if paperback==1

tab rank, mi /* table 3 */


******************
*Figure 3: Citations summary by coauthorship
******************

preserve

collapse (mean) googlecite_6yr googlecite_all google_outfor10  (count) n_yrsix=googlecite_6yr n_all=googlecite_all n_outten=google_outfor10 , by(type)

list 

graph bar (mean) googlecite_6yr google_outfor10, over(type) ytitle("Average Google Citations") legend(label(1 "6 years post publication") label(2 "Books over 10 years old") label(3 "Scopus")  row(1)) bar(1, color(gs5)) bar(2, color(gs10)) blabel(bar,format(%3.0f)position(inside) color(white) ) 

graph export Figure3.eps

restore

*calculate p-value for difference of means in citations 6 years post publication and those books older than 10 years.
 
forval x=1/5 {
forval i=1/5 { 

if "`i'"!="`x'" { /*we can't do a t-test across the same category of authorship */ 

di "********6 year********"
ttest googlecite_6yr if inlist(type, `i',`x') , by(type)
di "********OUT TEN ********"
ttest google_outfor10 if inlist(type, `i',`x'), by(type)

} 
else {
di "*can't do it*"    
} 
}
}

*****************************
*Figure 4: Rank, gender, and citations
*****************************

label define wom 0 "Men" 1 "Women", replace
lab values first_woman wom

graph box  googlecite_6yr if rank!="Reprint", over(first_woman) over(rank, sort(rank1)) ytitle("Average Google Citations")  

graph export Figure4.eps, replace

*calculate p-value for difference of means. 
foreach r in Assistant Associate Non-TT Full {

di "********" "`r'"  "********"
ttest googlecite_6yr if rank=="`r'" , by(confirm1)

}


******************************************
*Figure Appendix 1: horizontal bar, share of women by press.
******************************************
use "Samuels_Teele_allpresses_anonymous.dta", clear

keep if unipress==1 
keep if year <=2015

preserve 

collapse (sum) noauthors nwomen  (mean) all_male woman_only man_only all_women , by(Press) 

gen percentwomen=nwomen/noauthors
sum percentwomen
	*.2689029 women average authors 
	

graph hbar percentwomen , over(Press, sort(1) label(labsize(vsmall))) scheme(tufte) yline(.27, lcolor(red)) ytitle("Share of Women Among All Authors") yscale(titlegap(*10)) name(g3, replace)

restore

graph export FigureA1.eps, replace

*********************************
*Figure Appendix 2: Women within the presses over time.  
*********************************
use "Samuels_Teele_allpresses_anonymous.dta", clear


keep if year <=2015
keep if unipress==1

collapse (sum) noauthors nwomen  (mean) coed all_male woman_only man_only all_women , by(Press year)

foreach var in all_male woman_only man_only all_women {

replace `var'=100 *round(`var', .01)
} 


gen percentwomen=nwomen/noauthors
keep Press year percent

 egen avgwomen=mean(percent), by(Press) 
 replace avg=round(avg*100, 1)
 tostring avg, replace
 egen name=concat(Press avg), punct( " ")
 encode name, gen(name1)
 
 xtset name1 year 
 xtline percent, ytitle("Women Among Authors, %") xtitle("") note("") 
 
 graph export FigureA2.eps, replace
*byopts(rescale *1.2)
 
restore
 
